rm(list = ls())

# create log file
con_log <- file("output/ep-second-order.log")
sink(con_log)
sink(con_log, type="message")

# Note: R script based on a previous analysis and therefore still relying on older packages -- esp. plyr
format(Sys.time(), '%d %B %Y, %H:%M')

if("package:dplyr" %in% search()) detach("package:dplyr", character.only = TRUE)
pkgs <- c('ggplot2', 'lmtest', 'plyr', 'rjson', 'RSQLite', 'sandwich', 'stargazer')
reqs <- vapply(pkgs, suppressMessages(require), character.only = TRUE, FUN.VALUE = logical(1))

# read ParlGov main data tables
con <- dbConnect(dbDriver('SQLite'), dbname='parlgov-2015-stable.db')
# dbListTables(con)

country <- dbReadTable(con, 'country')
party <- dbReadTable(con, 'view_party')
elec <- dbReadTable(con, 'view_election')
cab <- dbReadTable(con, 'view_cabinet')
dbDisconnect(con)

# read data about EU accessions -- json data from ParlGov json table
ep.json <- '[{"date": "1979-06-10", "seats_total": 410, "type_id": 29}, {"date": "1981-01-01", "seats_total": 434, "type_id": 18}, {"date": "1984-06-17", "seats_total": 434, "type_id": 29}, {"date": "1986-01-01", "seats_total": 518, "type_id": 18}, {"date": "1989-06-18", "seats_total": 518, "type_id": 29}, {"date": "1994-06-12", "seats_total": 518, "type_id": 29}, {"date": "1995-01-01", "seats_total": 626, "type_id": 18}, {"date": "1999-06-13", "seats_total": 626, "type_id": 29}, {"date": "2004-06-13", "seats_total": 732, "type_id": 29}, {"date": "2009-06-07", "seats_total": 736, "type_id": 29}, {"date": "1996-10-20", "seats_total": 626, "type_id": 7}, {"date": "1995-10-09", "seats_total": 628, "type_id": 7}, {"date": "1981-10-18", "seats_total": 434, "type_id": 7}, {"date": "1987-09-13", "seats_total": 518, "type_id": 7}, {"date": "1987-07-05", "seats_total": 518, "type_id": 7}, {"date": "2007-01-01", "seats_total": 785, "type_id": 18}, {"date": "2007-05-20", "seats_total": 785, "type_id": 7}, {"date": "2007-11-25", "seats_total": 785, "type_id": 7}]'
ep.info <- do.call(rbind.data.frame, fromJSON(ep.json))

# create data set for EP election results and cabinet status
ep.accession <- subset(ep.info, type_id == 18, date)
ep <- subset(elec, subset = election_type == 'ep' & ! election_date %in% ep.accession$date)
ep$merge.id <- paste(ep$previous_cabinet_id, ep$party_id)
# add cabinet parties
cab$merge.id <- paste(cab$cabinet_id, cab$party_id)
cab.merge <- cab[ , c('merge.id', 'cabinet_party')]
ep <- merge(ep, cab.merge, by = 'merge.id', all.x = TRUE)
ep$merge.id <- paste(ep$previous_parliament_election_id, ep$party_id)

# create data set for national election results
parl <- subset(elec, election_type == 'parliament'
                     & election_id %in% ep$previous_parliament_election_id,
               c(country_name_short:party_name_english,
                 election_id, party_id) )
parl$merge.id <- paste(parl$election_id, parl$party_id)

# get duration since last parliamentary election
ep.merge <- subset(ep, !duplicated(election_id),
                   c(election_date, previous_parliament_election_id) )
ep.merge <- plyr::rename(ep.merge, c('election_date' = 'election_date_ep',
                         'previous_parliament_election_id' = 'election_id') )
parl.merge <- subset(parl, ! duplicated(election_id) , c(election_id, election_date) )
dur <- merge(ep.merge, parl.merge, by = 'election_id', all.x = TRUE)
dur$duration <- as.Date(dur$election_date_ep, '%Y-%m-%d') - as.Date(dur$election_date, '%Y-%m-%d')
dur <- dur[ , c('election_id', 'duration')]

# combine national and EP election results
parl.merge <- parl[ , c('merge.id', 'vote_share')]
ep.merge <- ep[ , c('merge.id', 'party_id', 'vote_share', 'cabinet_party')]
ep.merge <- rename(ep.merge, c('vote_share' = 'vote_share_ep',
                               c('party_id' = 'party_id_ep')) )
out <- merge(parl.merge, ep.merge, by = 'merge.id', all = TRUE)

# not all entries have merge matches in both data sets
# extract party and election ids for all observations
out$election_id <- as.integer(sapply(strsplit(out$merge.id, ' '), function(x) x[1]))
out$party_id <- as.integer(sapply(strsplit(out$merge.id, ' '), function(x) x[2]))

# add duration data
out <- merge(out, dur, by = 'election_id', all.x = TRUE)
# set 'cabinet_party' to zero for parties not in parliament
out[is.na(out$cabinet_party), 'cabinet_party'] <- 0
# set 'vote_share' to zero for parties not in parliament or EP results
out[is.na(out$vote_share), 'vote_share'] <- 0
out[is.na(out$vote_share_ep), 'vote_share_ep'] <- 0
out$vote_diff <- out$vote_share_ep - out$vote_share
# add election date
ep.merge <- subset(ep, !duplicated(previous_parliament_election_id),
                   c(previous_parliament_election_id, election_date))
ep.merge <- rename(ep.merge, c('previous_parliament_election_id' = 'election_id') )
out <- merge(out, ep.merge, by = 'election_id', all.x = TRUE)

party.merge <- subset(party, select = c(party_id, country_name_short:eu_anti_pro) )
out <- merge(party.merge, out, by = 'party_id', all.y = TRUE)

out.file <- 'output/ep-second-order.csv'
write.csv(out, out.file, na='', row.names = FALSE)


## Replication with ParlGov data

# Hix, Simon, and Michael Marsh. 2007. Punishment or protest?
# Understanding European Parliament elections" Journal of Politics
# 69(2): 495-510.

# use Hix/March variable names
hm <- out[out$vote_share_ep > 0.0 , ]  # include only parties winning votes in analysis
hm <- rename(hm, c('country_name_short'='country', 'cabinet_party'='government',
                   'family_name'='party.family', 'vote_diff'='gain', 'vote_share'='size'))
hm$year <- as.integer(substr(hm$election_date, 1, 4))
hm$early <- as.integer(hm$duration < 365)  # approximate by first year after election
hm$new.party <- as.integer(hm$size == 0)

# add dichotomous variable for new EU members
hm$eu.new <- hm$country_name %in% country[country$eu_accession_date > '2000-01-01', 'name']

# calculate party positional data -- rescale to -10/10 scale
hm <- plyr::rename(hm, c('left_right'='left.right', 'eu_anti_pro'='anti.pro.eu'))
hm$left.right <- hm$left.right * 2 - 10  # rescale  to 20 points scale
hm$left.right.extreme <- hm$left.right^2  # approximate extremism -- party distance from center squared
hm$anti.pro.eu <- hm$anti.pro.eu * 2 - 10  # rescale  to 20 points scale
hm$anti.pro.eu.extreme <- hm$anti.pro.eu^2  # approximate extremism -- party distance from center squared

# approximate dichotomous anti-EU variable
hm$anti.eu <- as.integer(hm$anti.pro.eu < -0.5)

# calculate polynomials
hm$size.2 <- hm$size^2
hm$size.3 <- hm$size^3

# Hix/March (2007, 502) -- Figure 1 -- all countries and years
hm.pl <- hm[hm$new.party == 0 , ]  # exclude new parties
hm.pl$government <- factor(hm.pl$government, levels = c(1, 0), labels = c('yes', 'no'))
pl <- ggplot(hm.pl, aes(x=size, y=gain, shape=government)) +
  geom_point() + geom_smooth() + theme_bw()
print(pl)  # export 700*500 px
ggsave(plot = pl, file = 'output/figure-2.png',  width = 7, height = 5, dpi = 300)


## Replication of models

hm$size.government <- hm$size * hm$government
hm$government.early <- hm$government * hm$early

# write.csv(hm, 'hix_march_2007.csv', na='', row.names = FALSE)

lm_robust <- function(lm_formula, lm_data = NA, hc_type = "HC0") {
  model <- lm(lm_formula, data = lm_data)
  print(sprintf('n = %d --  adj. R2 = %.2f', nobs(model), summary(model)$adj.r.squared))
  coeftest(model, vcov = vcovHC(model, type = hc_type))  # HC0, sandwich; HC1 Stata 'robust'
}

# Hix/March (2007, 504) -- Table 3 -- Model 7

lm.formula <- formula("gain ~ size + size.2 + size.3 + size.government + early + government.early + left.right + left.right.extreme + anti.pro.eu + anti.pro.eu.extreme + new.party")

lm.data <- hm[hm$election_date < '2005-01-01' & hm$eu.new == 0, ]
m.7.hm <- lm_robust(lm.formula, lm.data)  # Hix/March Model 7 -- 1979-2004 EU old

m.7.eu.old <- lm_robust(lm.formula, hm[hm$eu.new == 0, ])  # Hix/March Model 7 -- 1979-2014 EU old
m.7.eu.new <- lm_robust(lm.formula, hm[hm$eu.new == 1, ])  # Hix/March Model 7 -- 2004-2014 EU new
m.7.eu.all <- lm_robust(lm.formula, hm)  # Hix/March Model 7 -- 1979-2014 EU all

m.7.all <- list(m.7.hm, m.7.eu.old, m.7.eu.new, m.7.eu.all)
stargazer_param <- list(type = 'text', digits = 2,  star.cutoffs = c(0.05, 0.01), out = 'output/table-2.html')
do.call(stargazer, append(m.7.all, stargazer_param))


# R session info and restore output to console
sessionInfo()
sink() 
sink(type="message")

closeAllConnections()

